Reading and Cleaning Data For Car Crashes in Indiana

library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readxl)

crash_indie <- read_excel("./IndianaFatalCrashes.xlsx")
weather_indie <- read_excel("./book1.xlsx")
colnames(crash_indie) <- c("Time", "Crash")

crash_indie$Time <- as.Date(crash_indie$Time)
weather_indie$Month <- as.Date(weather_indie$Month)
weather_indie <- weather_indie[weather_indie$Month >= as.Date("1999-01-01"), ]
weather_indie <- weather_indie[weather_indie$Month <= as.Date("2022-12-01"), ]


final_df <- cbind(crash_indie, weather_indie)
final_df <- final_df[, -3]
final_df <- final_df[final_df$Time <= as.Date("2019-12-01"), ]

final_covid <- final_df[final_df$Time <= as.Date("2019-12-01"), ]

Testing for Random Walk

final.ts <- ts(final_df$Crash, start =c(1999, 1), end = c(2022, 12), freq = 12)
final.covid <- ts(final_covid$Crash, start =c(1999, 1), end = c(2019, 12), freq = 12)

nValid <- 48
nTrain <- length(final.covid) - nValid
train.ts <- window(final.covid, start = c(1999,1), end = c(1999,nTrain))
valid.ts <- window(final.covid, start=c(1999, nTrain+1), end = c(1999, nTrain+nValid))

Arima(final.covid, order = c(1, 0, 0))
## Series: final.covid 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.5192  63.7163
## s.e.  0.0538   1.5059
## 
## sigma^2 = 134.3:  log likelihood = -974.15
## AIC=1954.3   AICc=1954.4   BIC=1964.89
diff.covid <- diff(final.covid)
Acf(diff.covid)

Acf(final.covid)

autoplot(final.covid) + ggtitle("Time Series of Final Car Crashes in Indiana") + ylab("Number of Crashes")

snaive.pred <- snaive(train.ts, h = nValid, level = 0)
library(ggplot2)
library(scales)  

covid_df <- data.frame(
  Date = seq(as.Date("1999-01-01"), by = "month", length.out = length(final.covid)),  
  Value = as.numeric(final.covid)
)

split_date <- covid_df$Date[nTrain + 1]

ggplot(covid_df, aes(x = Date, y = Value)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(split_date), 
             linetype = "dashed", color = "red", linewidth = 1) + 
  annotate("text", x = as.Date("2008-01-01"), y = max(covid_df$Value), 
           label = "Training", fontface = "bold", size = 5) +
  annotate("text", x = as.Date("2017-01-01"), y = max(covid_df$Value), 
           label = "Validation", fontface = "bold", size = 5) +
  annotate("segment", x = as.Date("2000-01-01"), xend = as.Date("2015-12-01"), 
           y = max(covid_df$Value) * 0.95, 
           yend = max(covid_df$Value) * 0.95, 
           arrow = arrow(type = "closed", length = unit(0.2, "inches"))) +  
  annotate("segment", x = as.Date("2016-01-01"), xend = as.Date("2019-12-01"), 
           y = max(covid_df$Value) * 0.95, 
           yend = max(covid_df$Value) * 0.95, 
           arrow = arrow(type = "closed", length = unit(0.2, "inches"))) + 
  scale_x_date(date_breaks = "2 years", date_labels = "%Y") + 
  ggtitle("Data Partitioning: Training & Validation") +
  xlab("Time") + ylab("Crashes") +
  theme_minimal() + 
  theme(panel.grid = element_blank()) 

Monthly_mean <- tapply(final.covid, cycle(final.covid ), mean)
plot(Monthly_mean, xlab = "Months", ylab = "Average Car Crashes", type = "l", xaxt = "n")
axis(1, at = c(1:12), labels = c("Jan", "Feb", "Mar", "Apr", "May", "June", "July", "Aug", "Sep", "Oct", "Nov", "Dec")) 

Linear Model Prediction

linear_mod <- tslm(train.ts ~ trend + I(trend^2) + season)
linear_mod_pred <- forecast(linear_mod, h= nValid, level = 0) 

autoplot(linear_mod_pred) +
  autolayer(valid.ts, series = "Observed") + ggtitle("Linear Quadratics")

cube_mod <- tslm(train.ts ~ trend + I(trend^2) + I(trend^3)+ season)
cube_mod_pred <- forecast(cube_mod, h= nValid, level = 0) 

autoplot(cube_mod_pred) +
  autolayer(valid.ts, series = "Observed") + ggtitle("Linear Cubic")

holt.mod <- ets(train.ts, model = "ZZZ")
holt.pred <- forecast(holt.mod, h = nValid, level = 0)

autoplot(holt.pred) + 
  autolayer(valid.ts, series = "Observed")

train.auto.arima <- auto.arima(train.ts)
arima.auto.forecast <- forecast(train.auto.arima, h = nValid, level = 0)

autoplot(arima.auto.forecast) +
  autolayer(valid.ts, series = "Observed")

set.seed(2402)

p <- 13
P <- 2
size <- 3
model <- nnetar(train.ts, repeats = 40, p = p, P = P, size = size)
crash.nnetar.forecast <- forecast(model, h = nValid)
autoplot(crash.nnetar.forecast) +
  autolayer(valid.ts, series = "Observed") 

set.seed(2402)
library(forecast)
p_values <- c(8,9,10,11,12,13,14,15)
P_values <- c(1,2,3,4,5)
size_values <- c(1,2,3,4,5)

best_rmse <- Inf
best_params <- c(NA, NA, NA)
results <- data.frame(p = integer(), P = integer(), size = integer(), RMSE = numeric())

accuracy_NN = NA

for (p in p_values) {
  for (P in P_values) {
    for (size in size_values) {
      tryCatch({
        model <- nnetar(train.ts, repeats = 40, p = p, P = P, size = size)
        forcasted_values <- forecast(model, h = nValid)
        accuracy_NN <- accuracy(forcasted_values, valid.ts)
        rmse <- accuracy_NN["Test set", "RMSE"]
        results <- rbind(results, data.frame(p = p, P = P, size = size, RMSE = rmse))
        if (rmse < best_rmse) {
          best_rmse <- rmse
          best_params <- c(p, P, size)
        }
      }, error = function(e) {
      })
    }
  }
}

which.min(results$RMSE)
p <- 2
d <- 1
q <- 3

train.arima <- Arima(train.ts, order = c(p, d, q))
arima.forecast <- forecast(train.arima, h = nValid)
regressors <- data.frame(reg1 = final_covid$`Average Temperature`,
                         reg2 = final_covid$Precipation)

external_regressors_train <- regressors[1:length(train.ts), ]
external_regressors_valid <- regressors[(length(train.ts) + 1):nrow(regressors), ]
best_model_x <- auto.arima(train.ts, 
                           xreg = as.matrix(external_regressors_train), 
                           seasonal = TRUE,                         
                           stepwise = FALSE,  
                           approximation = FALSE, 
                           trace = TRUE,
                           ic = "aic",
                           lambda = "auto") 
## 
##  Regression with ARIMA(0,1,0)            errors : -41.44068
##  Regression with ARIMA(0,1,0)            errors : -39.44069
##  Regression with ARIMA(0,1,0)(0,0,1)[12] errors : -41.20274
##  Regression with ARIMA(0,1,0)(0,0,1)[12] errors : -39.20284
##  Regression with ARIMA(0,1,0)(0,0,2)[12] errors : -39.20275
##  Regression with ARIMA(0,1,0)(0,0,2)[12] errors : -37.20285
##  Regression with ARIMA(0,1,0)(1,0,0)[12] errors : -41.21909
##  Regression with ARIMA(0,1,0)(1,0,0)[12] errors : -39.21919
##  Regression with ARIMA(0,1,0)(1,0,1)[12] errors : -41.46199
##  Regression with ARIMA(0,1,0)(1,0,1)[12] errors : -39.46224
##  Regression with ARIMA(0,1,0)(1,0,2)[12] errors : -39.46666
##  Regression with ARIMA(0,1,0)(1,0,2)[12] errors : -37.46692
##  Regression with ARIMA(0,1,0)(2,0,0)[12] errors : -39.23627
##  Regression with ARIMA(0,1,0)(2,0,0)[12] errors : -37.23637
##  Regression with ARIMA(0,1,0)(2,0,1)[12] errors : -39.46534
##  Regression with ARIMA(0,1,0)(2,0,1)[12] errors : -37.46559
##  Regression with ARIMA(0,1,0)(2,0,2)[12] errors : -37.91349
##  Regression with ARIMA(0,1,0)(2,0,2)[12] errors : -35.91371
##  Regression with ARIMA(0,1,1)            errors : -126.4305
##  Regression with ARIMA(0,1,1)            errors : -124.4339
##  Regression with ARIMA(0,1,1)(0,0,1)[12] errors : -125.0234
##  Regression with ARIMA(0,1,1)(0,0,1)[12] errors : -123.027
##  Regression with ARIMA(0,1,1)(0,0,2)[12] errors : -129.9174
##  Regression with ARIMA(0,1,1)(0,0,2)[12] errors : -127.9203
##  Regression with ARIMA(0,1,1)(1,0,0)[12] errors : -125.2539
##  Regression with ARIMA(0,1,1)(1,0,0)[12] errors : -123.2575
##  Regression with ARIMA(0,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(2,0,0)[12] errors : -130.4001
##  Regression with ARIMA(0,1,1)(2,0,0)[12] errors : -128.405
##  Regression with ARIMA(0,1,1)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(2,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,1)(2,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,2)            errors : -128.4303
##  Regression with ARIMA(0,1,2)            errors : -126.4303
##  Regression with ARIMA(0,1,2)(0,0,1)[12] errors : -127.4435
##  Regression with ARIMA(0,1,2)(0,0,1)[12] errors : -125.4436
##  Regression with ARIMA(0,1,2)(0,0,2)[12] errors : -129.9252
##  Regression with ARIMA(0,1,2)(0,0,2)[12] errors : -127.9254
##  Regression with ARIMA(0,1,2)(1,0,0)[12] errors : -127.7679
##  Regression with ARIMA(0,1,2)(1,0,0)[12] errors : -125.7679
##  Regression with ARIMA(0,1,2)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,2)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,2)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,2)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(0,1,2)(2,0,0)[12] errors : -130.637
##  Regression with ARIMA(0,1,2)(2,0,0)[12] errors : -128.6376
##  Regression with ARIMA(0,1,2)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,2)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,3)            errors : -126.4581
##  Regression with ARIMA(0,1,3)            errors : -124.4581
##  Regression with ARIMA(0,1,3)(0,0,1)[12] errors : -125.4814
##  Regression with ARIMA(0,1,3)(0,0,1)[12] errors : -123.4814
##  Regression with ARIMA(0,1,3)(0,0,2)[12] errors : -128.0228
##  Regression with ARIMA(0,1,3)(0,0,2)[12] errors : -126.0235
##  Regression with ARIMA(0,1,3)(1,0,0)[12] errors : -125.803
##  Regression with ARIMA(0,1,3)(1,0,0)[12] errors : -123.8031
##  Regression with ARIMA(0,1,3)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,3)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(0,1,3)(2,0,0)[12] errors : -128.7321
##  Regression with ARIMA(0,1,3)(2,0,0)[12] errors : -126.7335
##  Regression with ARIMA(0,1,4)            errors : -125.3671
##  Regression with ARIMA(0,1,4)            errors : -123.3687
##  Regression with ARIMA(0,1,4)(0,0,1)[12] errors : -124.2996
##  Regression with ARIMA(0,1,4)(0,0,1)[12] errors : -122.3017
##  Regression with ARIMA(0,1,4)(1,0,0)[12] errors : -124.5821
##  Regression with ARIMA(0,1,4)(1,0,0)[12] errors : -122.5842
##  Regression with ARIMA(0,1,5)            errors : -125.7405
##  Regression with ARIMA(0,1,5)            errors : -123.7405
##  Regression with ARIMA(1,1,0)            errors : -77.29271
##  Regression with ARIMA(1,1,0)            errors : -75.29302
##  Regression with ARIMA(1,1,0)(0,0,1)[12] errors : -75.83892
##  Regression with ARIMA(1,1,0)(0,0,1)[12] errors : -73.8395
##  Regression with ARIMA(1,1,0)(0,0,2)[12] errors : -74.47445
##  Regression with ARIMA(1,1,0)(0,0,2)[12] errors : -72.47486
##  Regression with ARIMA(1,1,0)(1,0,0)[12] errors : -75.90718
##  Regression with ARIMA(1,1,0)(1,0,0)[12] errors : -73.9078
##  Regression with ARIMA(1,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,0)(1,0,2)[12] errors : -76.10588
##  Regression with ARIMA(1,1,0)(1,0,2)[12] errors : -74.10678
##  Regression with ARIMA(1,1,0)(2,0,0)[12] errors : -74.73827
##  Regression with ARIMA(1,1,0)(2,0,0)[12] errors : -72.73875
##  Regression with ARIMA(1,1,0)(2,0,1)[12] errors : -75.98603
##  Regression with ARIMA(1,1,0)(2,0,1)[12] errors : -73.98698
##  Regression with ARIMA(1,1,0)(2,0,2)[12] errors : -74.88363
##  Regression with ARIMA(1,1,0)(2,0,2)[12] errors : -72.88414
##  Regression with ARIMA(1,1,1)            errors : -128.5042
##  Regression with ARIMA(1,1,1)            errors : -126.5042
##  Regression with ARIMA(1,1,1)(0,0,1)[12] errors : -127.5326
##  Regression with ARIMA(1,1,1)(0,0,1)[12] errors : -125.5328
##  Regression with ARIMA(1,1,1)(0,0,2)[12] errors : -129.8243
##  Regression with ARIMA(1,1,1)(0,0,2)[12] errors : -127.8244
##  Regression with ARIMA(1,1,1)(1,0,0)[12] errors : -127.8507
##  Regression with ARIMA(1,1,1)(1,0,0)[12] errors : -125.8509
##  Regression with ARIMA(1,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,1)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(1,1,1)(1,0,2)[12] errors : Inf
##  Regression with ARIMA(1,1,1)(2,0,0)[12] errors : -130.5239
##  Regression with ARIMA(1,1,1)(2,0,0)[12] errors : -128.5243
##  ARIMA(1,1,1)(2,0,1)[12]                    : Inf
##  Regression with ARIMA(1,1,1)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,2)            errors : -126.5061
##  Regression with ARIMA(1,1,2)            errors : -124.5062
##  Regression with ARIMA(1,1,2)(0,0,1)[12] errors : -125.5334
##  Regression with ARIMA(1,1,2)(0,0,1)[12] errors : -123.5336
##  Regression with ARIMA(1,1,2)(0,0,2)[12] errors : -132.1333
##  Regression with ARIMA(1,1,2)(0,0,2)[12] errors : -130.1347
##  ARIMA(1,1,2)(1,0,0)[12]                    : Inf
##  Regression with ARIMA(1,1,2)(1,0,0)[12] errors : -123.851
##  Regression with ARIMA(1,1,2)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,2)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(1,1,2)(2,0,0)[12] errors : Inf
##  Regression with ARIMA(1,1,2)(2,0,0)[12] errors : Inf
##  Regression with ARIMA(1,1,3)            errors : -125.3485
##  Regression with ARIMA(1,1,3)            errors : -123.366
##  Regression with ARIMA(1,1,3)(0,0,1)[12] errors : -123.9711
##  Regression with ARIMA(1,1,3)(0,0,1)[12] errors : -121.9829
##  Regression with ARIMA(1,1,3)(1,0,0)[12] errors : Inf
##  Regression with ARIMA(1,1,3)(1,0,0)[12] errors : Inf
##  Regression with ARIMA(1,1,4)            errors : -128.2081
##  Regression with ARIMA(1,1,4)            errors : -126.2099
##  Regression with ARIMA(2,1,0)            errors : -94.08262
##  Regression with ARIMA(2,1,0)            errors : -92.08316
##  Regression with ARIMA(2,1,0)(0,0,1)[12] errors : -92.38579
##  Regression with ARIMA(2,1,0)(0,0,1)[12] errors : -90.38609
##  Regression with ARIMA(2,1,0)(0,0,2)[12] errors : -93.82953
##  Regression with ARIMA(2,1,0)(0,0,2)[12] errors : -91.83044
##  Regression with ARIMA(2,1,0)(1,0,0)[12] errors : -92.46627
##  Regression with ARIMA(2,1,0)(1,0,0)[12] errors : -90.46652
##  Regression with ARIMA(2,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,0)(1,0,2)[12] errors : -98.35493
##  Regression with ARIMA(2,1,0)(1,0,2)[12] errors : -96.35486
##  Regression with ARIMA(2,1,0)(2,0,0)[12] errors : -93.94983
##  Regression with ARIMA(2,1,0)(2,0,0)[12] errors : -91.95048
##  Regression with ARIMA(2,1,0)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,0)(2,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,1)            errors : -126.5056
##  Regression with ARIMA(2,1,1)            errors : -124.5057
##  Regression with ARIMA(2,1,1)(0,0,1)[12] errors : -125.5332
##  Regression with ARIMA(2,1,1)(0,0,1)[12] errors : -123.5334
##  Regression with ARIMA(2,1,1)(0,0,2)[12] errors : -127.9185
##  Regression with ARIMA(2,1,1)(0,0,2)[12] errors : -125.919
##  Regression with ARIMA(2,1,1)(1,0,0)[12] errors : -125.8508
##  Regression with ARIMA(2,1,1)(1,0,0)[12] errors : -123.851
##  Regression with ARIMA(2,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,1)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,1)(2,0,0)[12] errors : -128.6313
##  Regression with ARIMA(2,1,1)(2,0,0)[12] errors : -126.6323
##  Regression with ARIMA(2,1,2)            errors : Inf
##  Regression with ARIMA(2,1,2)            errors : Inf
##  Regression with ARIMA(2,1,2)(0,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,2)(0,0,1)[12] errors : Inf
##  Regression with ARIMA(2,1,2)(1,0,0)[12] errors : Inf
##  Regression with ARIMA(2,1,2)(1,0,0)[12] errors : Inf
##  Regression with ARIMA(2,1,3)            errors : Inf
##  Regression with ARIMA(2,1,3)            errors : Inf
##  Regression with ARIMA(3,1,0)            errors : -95.33238
##  Regression with ARIMA(3,1,0)            errors : -93.33564
##  Regression with ARIMA(3,1,0)(0,0,1)[12] errors : -93.82181
##  Regression with ARIMA(3,1,0)(0,0,1)[12] errors : -91.8243
##  Regression with ARIMA(3,1,0)(0,0,2)[12] errors : -96.31651
##  Regression with ARIMA(3,1,0)(0,0,2)[12] errors : -94.32193
##  Regression with ARIMA(3,1,0)(1,0,0)[12] errors : -93.97033
##  Regression with ARIMA(3,1,0)(1,0,0)[12] errors : -91.97265
##  Regression with ARIMA(3,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(3,1,0)(1,0,1)[12] errors : Inf
##  Regression with ARIMA(3,1,0)(2,0,0)[12] errors : -96.60192
##  Regression with ARIMA(3,1,0)(2,0,0)[12] errors : -94.6066
##  Regression with ARIMA(3,1,1)            errors : -124.6032
##  Regression with ARIMA(3,1,1)            errors : -122.6034
##  Regression with ARIMA(3,1,1)(0,0,1)[12] errors : -123.6112
##  Regression with ARIMA(3,1,1)(0,0,1)[12] errors : -121.6116
##  Regression with ARIMA(3,1,1)(1,0,0)[12] errors : -123.9272
##  Regression with ARIMA(3,1,1)(1,0,0)[12] errors : -121.9276
##  Regression with ARIMA(3,1,2)            errors : -128.0176
##  Regression with ARIMA(3,1,2)            errors : -126.0193
##  Regression with ARIMA(4,1,0)            errors : -101.0856
##  Regression with ARIMA(4,1,0)            errors : -99.09223
##  Regression with ARIMA(4,1,0)(0,0,1)[12] errors : -99.60011
##  Regression with ARIMA(4,1,0)(0,0,1)[12] errors : -97.60553
##  Regression with ARIMA(4,1,0)(1,0,0)[12] errors : -99.74008
##  Regression with ARIMA(4,1,0)(1,0,0)[12] errors : -97.74527
##  Regression with ARIMA(4,1,1)            errors : -125.5139
##  Regression with ARIMA(4,1,1)            errors : -123.5139
##  Regression with ARIMA(5,1,0)            errors : -103.8484
##  Regression with ARIMA(5,1,0)            errors : -101.8577
## 
## 
## 
##  Best model: Regression with ARIMA(1,1,2)(0,0,2)[12] errors
best_model_x <- Arima(train.ts, 
                      order = c(1, 1, 2),  
                      seasonal = list(order = c(0, 0, 1), period = 12),
                      xreg = as.matrix(external_regressors_train))

forecasted_values <- forecast(best_model_x, 
                              xreg = as.matrix(external_regressors_valid), 
                              h = nValid,
                              level = 0)

accuracy(forecasted_values$mean, valid.ts)
##                ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
## Test set 1.180597 10.38265 8.745406 -0.8485372 12.86145 0.2219281  0.718482
autoplot(forecasted_values) + 
  autolayer(valid.ts, series = "Observed") + 
  ggtitle("Optimized ARIMAX Forecast") 

forecast.vectors.df <- data.frame(cbind(linear_mod_pred$mean, holt.pred$mean, cube_mod_pred$mean, crash.nnetar.forecast$mean, snaive.pred$mean, forecasted_values$mean))
 
forecast.vectors.df$comb.trimmed.avg <- apply(forecast.vectors.df, 1, function(x) mean(x, trim = 0.2))

comb.trimmed.avg <- ts(forecast.vectors.df$comb.trimmed.avg, start = c(1999, 1), end = c(2019, 12), freq = 12)

autoplot(train.ts) +
  autolayer(comb.trimmed.avg, series = "Trimmed Avg Comb") +
  autolayer(valid.ts, series = "Observed")

Model_Accuracy <- data.frame(rbind(
  accuracy(snaive.pred$mean, valid.ts),
  accuracy(holt.pred$mean, valid.ts),
  accuracy(cube_mod_pred$mean, valid.ts),
  accuracy(linear_mod_pred$mean, valid.ts),
  accuracy(arima.forecast$mean, valid.ts),
  accuracy(arima.auto.forecast$mean, valid.ts),
  accuracy(forecasted_values$mean, valid.ts),
  accuracy(crash.nnetar.forecast$mean, valid.ts),
  accuracy(comb.trimmed.avg, valid.ts)))

rownames(Model_Accuracy) <- c("Seasonal Naive", "Holt Winter", "Exponential Linear Model", "Linear Model", "Arima", " Auto Arima", "Exogeneous Arima", "Neural Network", "Trimmed Average")
p <- 13
P <- 2
size <- 3

future.nnetar <- nnetar(final.ts, repeats = 20, p = p, P = P, size = size)
future.nnetar.forecast <- forecast(future.nnetar, h = 60)

checkresiduals(crash.nnetar.forecast)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(13,2,3)[12]
## Q* = 19.66, df = 24, p-value = 0.716
## 
## Model df: 0.   Total lags used: 24
autoplot(final.ts) + autolayer(future.nnetar.forecast, series = "Prediction") +
  ggtitle("Future Prediction for the Next Four Years") + ylab("Number of Crashes") +theme_minimal()

# autoplot(crash.nnetar.forecast$residuals)
# 
# nnetar_residuals <- residuals(crash.nnetar)
# 
# # Manually plot ACF of residuals
# Acf(nnetar_residuals, main = "ACF of Neural Network Residuals")
# 
# # Histogram of residuals
# hist(nnetar_residuals, main = "Histogram of Residuals", col = "skyblue", border = "black")
# 
# # QQ plot for normality check
# qqnorm(nnetar_residuals)
# qqline(nnetar_residuals, col = "red")
library(readxl)

Michigan <- read_excel("./MichiganCarCrashes.xlsx")
Illinois <- read_excel("./IllinoisCarCrashes.xlsx")
Ohio <- read_excel("./Ohio Crashes.xlsx")

Michigan$Month <- as.Date(Michigan$Month)
Illinois$Month <- as.Date(Illinois$Month)
Ohio$Time <- as.Date(Ohio$Time)

Michigan.ts <- ts(Michigan$Crashes, start =c(1999, 1), end = c(2019, 12), freq = 12)
Illinois.ts <- ts(Illinois$Crash, start =c(1999, 1), end = c(2019, 12), freq = 12)
Ohio.ts <- ts(Ohio$Crashes, start =c(1999, 1), end = c(2019, 12), freq = 12)

Use Nuetral Network to make the time series prediction for the other states as it is our best performing model.

michigan.nnetar <- nnetar(Michigan.ts, repeats =20, p = 15, P = 2, size = 10)
michigan.nnetar.forecast <- forecast(michigan.nnetar, h = 24, level = 0)
autoplot(michigan.nnetar.forecast)

illinois.nnetar <- nnetar(Illinois.ts, repeats =20, p = 10, P = 1, size = 7)
illinois.nnetar.forecast <- forecast(illinois.nnetar, h = 24, level = 0)
autoplot(illinois.nnetar.forecast)

ohio.nnetar <- nnetar(Ohio.ts, repeats =20, p = 5, P = 1, size = 10)
ohio.nnetar.forecast <- forecast(ohio.nnetar, h = 24, level = 0)
autoplot(ohio.nnetar.forecast)